library(corrplot) # for correlation matix
library(stargazer) # visualize model fit
library(skimr) # generate summary statistics
library(car) # statistics
library(lmtest) # linear modeling
library(sandwich)
library(tidyverse) # for data import, manipulation, viz
filter <- dplyr::filter
lm(log(crmrte) ~ log(eff_cj) + log(avgsen) + log(density) + mix + pctymle + pctmin80 + west + central + urban + log(taxpc) , data = data2)
Error in eval(predvars, data, env) : object 'eff_cj' not found
data2$acj = data2$prbarr * data2$prbconv * data2$prbpris
data2$west_proper = ifelse(data2$west == 1 & data2$central != 1, 1, 0)
df = data2 %>%
mutate(
acj = prbarr * prbconv * prbpris
, west_proper = ifelse(west == 1 & central != 1, 1, 0)
, central_proper = ifelse(west != 1 & central == 1, 1, 0)
, num_minorities = density * pctmin80
, num_ymales = density * pctymle
) %>%
mutate(
ln_crmrte = log(crmrte)
, ln_acj = log(prbarr * prbconv * polpc)
, ln_wloc = log(wloc)
, ln_wtrd = log(wtrd)
, ln_density = log(density)
, ln_taxpc = log(taxpc)
, ln_pctymle = log(pctymle)
) %>%
mutate(
acj = prbarr * prbconv * polpc
, west_proper = ifelse(west == 1 & central != 1, 1, 0)
, central_proper = ifelse(west != 1 & central == 1, 1, 0)
) %>%
select(
ln_crmrte
, ln_acj
, ln_wloc
, ln_wtrd
, ln_taxpc
, ln_density
, pctmin80
, ln_pctymle
, num_minorities
, num_ymales
, urban
, west_proper
)
cor(df)
ln_crmrte ln_acj ln_wloc ln_wtrd ln_taxpc ln_density pctmin80 ln_pctymle num_minorities num_ymales urban west_proper
ln_crmrte 1.0000000 -0.39933207 0.30286706 0.38965891 0.33984322 0.49364251 0.23291821 0.31175403 0.6635139 0.66759236 0.49146445 -0.45141763
ln_acj -0.3993321 1.00000000 0.18125219 -0.04015954 -0.02452981 -0.25189220 0.05632914 -0.24887027 -0.1643648 -0.25482183 -0.16861495 0.15187249
ln_wloc 0.3028671 0.18125219 1.00000000 0.57419433 0.22092405 0.30295286 -0.10213445 0.02258912 0.3918107 0.43137206 0.33004924 -0.15296513
ln_wtrd 0.3896589 -0.04015954 0.57419433 1.00000000 0.16350047 0.42921817 -0.07527956 -0.09784808 0.4839622 0.48656695 0.39889779 -0.19939002
ln_taxpc 0.3398432 -0.02452981 0.22092405 0.16350047 1.00000000 0.10798692 0.02947733 -0.07360943 0.3520511 0.28828811 0.39785937 -0.19215618
ln_density 0.4936425 -0.25189220 0.30295286 0.42921817 0.10798692 1.00000000 -0.09668387 0.17515611 0.4816689 0.56497500 0.39272648 -0.25035461
pctmin80 0.2329182 0.05632914 -0.10213445 -0.07527956 0.02947733 -0.09668387 1.00000000 -0.01224664 0.2848787 -0.05069759 0.01619569 -0.62451443
ln_pctymle 0.3117540 -0.24887027 0.02258912 -0.09784808 -0.07360943 0.17515611 -0.01224664 1.00000000 0.1849915 0.40960966 0.12927006 -0.05212782
num_minorities 0.6635139 -0.16436478 0.39181075 0.48396222 0.35205109 0.48166887 0.28487870 0.18499148 1.0000000 0.88003337 0.80710870 -0.36836902
num_ymales 0.6675924 -0.25482183 0.43137206 0.48656695 0.28828811 0.56497500 -0.05069759 0.40960966 0.8800334 1.00000000 0.81288832 -0.20665165
urban 0.4914645 -0.16861495 0.33004924 0.39889779 0.39785937 0.39272648 0.01619569 0.12927006 0.8071087 0.81288832 1.00000000 -0.08000341
west_proper -0.4514176 0.15187249 -0.15296513 -0.19939002 -0.19215618 -0.25035461 -0.62451443 -0.05212782 -0.3683690 -0.20665165 -0.08000341 1.00000000
Base model
base_md = lm(log(crmrte) ~ log(acj), data = data2)
plot(base_md)
H0: demographic factors do not have an impact on the model fit
md1 = lm(log(crmrte) ~ log(acj) + log(density) + log(pctymle) + pctmin80, data = data2)
plot(md1)
coeftest(md1)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.9979154 0.5977845 -8.3607 1.061e-12 ***
log(acj) -0.4636576 0.0607214 -7.6358 3.043e-11 ***
log(density) 0.1787405 0.0269804 6.6248 2.999e-09 ***
log(pctymle) 0.0873188 0.1990884 0.4386 0.6621
pctmin80 0.0121271 0.0021797 5.5635 3.008e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
coeftest(md1, vcov = vcovHC)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.9979154 0.5727402 -8.7263 1.934e-13 ***
log(acj) -0.4636576 0.1495965 -3.0994 0.00263 **
log(density) 0.1787405 0.2364262 0.7560 0.45173
log(pctymle) 0.0873188 0.1534316 0.5691 0.57079
pctmin80 0.0121271 0.0023181 5.2315 1.192e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Density and minority percetange are significant on explaing the variation on crime rate. Bot variables have a positive relationship with crime rate. % of young males does not explain the variation of crime rate.
ggplot(data2, aes(x= pctymle*density , y=log(crmrte + 1))) +
geom_point()+
geom_smooth(method=lm, se=FALSE)
interac_md2 = lm(log(crmrte) ~ log(acj) + pctymle*density + pctmin80, data = data2)
plot(interac_md2)
coeftest(interac_md2, vcov= vcovHC)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.3111217 0.3238046 -16.4022 < 2.2e-16 ***
log(acj) -0.3427713 0.1046721 -3.2747 0.001537 **
pctymle 2.4606492 2.4707490 0.9959 0.322153
density 0.1974639 0.1243659 1.5878 0.116097
pctmin80 0.0111803 0.0022048 5.0710 2.328e-06 ***
pctymle:density -0.2173066 1.3803559 -0.1574 0.875285
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
md3 = lm(log(crmrte) ~ log(acj) + log(density*pctmin80) + log(pctymle), data = data2)
plot(md3)
coeftest(md3, vcov.= vcovHC)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.030762 0.513855 -9.7902 1.214e-15 ***
log(acj) -0.411609 0.104502 -3.9388 0.000166 ***
log(density * pctmin80) 0.193798 0.086934 2.2293 0.028404 *
log(pctymle) 0.112892 0.132557 0.8516 0.396775
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
interact_md4 = lm(log(crmrte) ~ log(acj) + density*pctmin80 + log(pctymle), data = data2)
plot(interact_md4)
se = sqrt(diag(vcovHC(base_md)))
se1 = sqrt(diag(vcovHC(md1)))
se2 = sqrt(diag(vcovHC(interac_md2)))
se3 = sqrt(diag(vcovHC(md3)))
se4 = sqrt(diag(vcovHC(interact_md4)))
# se
se1
(Intercept) log(acj) log(density) log(pctymle) pctmin80
0.572740198 0.149596487 0.236426216 0.153431604 0.002318106
stargazer(
base_md
, md1
, interac_md2
, md3
, interact_md4
, type = "text"
, se = list(se, se1, se2, se3, se4)
, star.cutoffs = c(0.05, 0.01, 0.001)
)
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changednumber of rows of result is not a multiple of vector length (arg 2)number of rows of result is not a multiple of vector length (arg 2)number of rows of result is not a multiple of vector length (arg 2)number of rows of result is not a multiple of vector length (arg 2)
==========================================================================================================================================
Dependent variable:
------------------------------------------------------------------------------------------------------------------
log(crmrte)
(1) (2) (3) (4) (5)
------------------------------------------------------------------------------------------------------------------------------------------
log(acj) -0.470*** -0.464** -0.343** -0.412*** -0.341**
(0.106) (0.150) (0.105) (0.105) (0.104)
log(density) 0.179
(0.236)
log(density * pctmin80) 0.194*
(0.087)
log(pctymle) 0.087 0.113 0.273
(0.153) (0.133) (0.160)
pctymle 2.461
(2.471)
density:pctmin80 -0.002
(0.003)
density 0.197 0.232**
(0.124) (0.083)
pctmin80 0.012*** 0.011*** 0.013***
(0.002) (0.002) (0.004)
pctymle:density -0.217
(1.380)
Constant -4.937*** -4.998*** -5.311*** -5.031*** -4.467***
(0.296) (0.573) (0.324) (0.514) (0.575)
------------------------------------------------------------------------------------------------------------------------------------------
Observations 90 90 90 90 90
R2 0.315 0.629 0.650 0.663 0.652
Adjusted R2 0.307 0.612 0.629 0.651 0.631
Residual Std. Error 0.457 (df = 88) 0.342 (df = 85) 0.334 (df = 84) 0.324 (df = 86) 0.333 (df = 84)
F Statistic 40.481*** (df = 1; 88) 36.066*** (df = 4; 85) 31.211*** (df = 5; 84) 56.390*** (df = 3; 86) 31.479*** (df = 5; 84)
==========================================================================================================================================
Note: *p<0.05; **p<0.01; ***p<0.001
demo_md1 = lm(log(crmrte) ~ log(acj) + pctmin80, data = data2)
plot(demo_md1)
demo_md2 = lm(log(crmrte) ~ log(acj) + log(density * pctmin80), data = data2)
plot(demo_md2)
s2_demo1 = sqrt(diag(vcovHC(demo_md1)))
s2_demo2 = sqrt(diag(vcovHC(demo_md2)))
stargazer(
base_md
, demo_md1
, demo_md2
, type = "text"
, se = list(se, s2_demo1, s2_demo2)
, star.cutoffs = c(0.05, 0.01, 0.001)
)
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changednumber of rows of result is not a multiple of vector length (arg 2)number of rows of result is not a multiple of vector length (arg 2)
============================================================================================
Dependent variable:
--------------------------------------------------------------------
log(crmrte)
(1) (2) (3)
--------------------------------------------------------------------------------------------
log(acj) -0.470*** -0.521*** -0.424***
(0.106) (0.093) (0.097)
pctmin80 0.011***
(0.002)
log(density * pctmin80) 0.195*
(0.087)
Constant -4.937*** -5.375*** -5.354***
(0.296) (0.249) (0.227)
--------------------------------------------------------------------------------------------
Observations 90 90 90
R2 0.315 0.430 0.662
Adjusted R2 0.307 0.416 0.654
Residual Std. Error 0.457 (df = 88) 0.419 (df = 87) 0.323 (df = 87)
F Statistic 40.481*** (df = 1; 88) 32.759*** (df = 2; 87) 85.029*** (df = 2; 87)
============================================================================================
Note: *p<0.05; **p<0.01; ***p<0.001
geo_md1 = lm(log(crmrte) ~ log(acj) + log(density * pctmin80) + west_proper, data = data2)
plot(geo_md1)
geo_md2 = lm(log(crmrte) ~ log(acj) + log(density * pctmin80) + west_proper + urban, data = data2)
plot(geo_md2)
se_geo1 = sqrt(diag(vcovHC(geo_md1)))
se_geo2 = sqrt(diag(vcovHC(geo_md2)))
stargazer(
demo_md2
, geo_md1
, geo_md2
, type = "text"
, se = list(s2_demo2, se_geo1, se_geo2)
, star.cutoffs = c(0.05, 0.01, 0.001)
)
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changednumber of rows of result is not a multiple of vector length (arg 2)number of rows of result is not a multiple of vector length (arg 2)
============================================================================================
Dependent variable:
--------------------------------------------------------------------
log(crmrte)
(1) (2) (3)
--------------------------------------------------------------------------------------------
log(acj) -0.424*** -0.423*** -0.385***
(0.097) (0.100) (0.087)
log(density * pctmin80) 0.195* 0.185 0.157
(0.087) (0.138) (0.161)
west_proper -0.062 -0.123
(0.248) (0.295)
urban 0.302
(0.294)
Constant -5.354*** -5.308*** -5.127***
(0.227) (0.347) (0.495)
--------------------------------------------------------------------------------------------
Observations 90 90 90
R2 0.662 0.663 0.681
Adjusted R2 0.654 0.651 0.666
Residual Std. Error 0.323 (df = 87) 0.324 (df = 86) 0.317 (df = 85)
F Statistic 85.029*** (df = 2; 87) 56.375*** (df = 3; 86) 45.339*** (df = 4; 85)
============================================================================================
Note: *p<0.05; **p<0.01; ***p<0.001
eco_md1 = lm(log(crmrte) ~ log(acj) + log(density * pctmin80) + log(taxpc), data = data2)
plot(eco_md1)
se_eco1 = sqrt(diag(vcovHC(eco_md1)))
stargazer(
demo_md2
, eco_md1
, type = "text"
, se = list(s2_demo2, se_eco1)
, star.cutoffs = c(0.05, 0.01, 0.001)
)
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changednumber of rows of result is not a multiple of vector length (arg 2)
=====================================================================
Dependent variable:
---------------------------------------------
log(crmrte)
(1) (2)
---------------------------------------------------------------------
log(acj) -0.424*** -0.398***
(0.097) (0.089)
log(density * pctmin80) 0.195* 0.190*
(0.087) (0.085)
log(taxpc) 0.254
(0.186)
Constant -5.354*** -6.178***
(0.227) (0.661)
---------------------------------------------------------------------
Observations 90 90
R2 0.662 0.675
Adjusted R2 0.654 0.664
Residual Std. Error 0.323 (df = 87) 0.318 (df = 86)
F Statistic 85.029*** (df = 2; 87) 59.628*** (df = 3; 86)
=====================================================================
Note: *p<0.05; **p<0.01; ***p<0.001
mix_md1 = lm(log(crmrte) ~ log(acj) + log(density * pctmin80) + log(taxpc), data = data2)
plot(mix_md1)